`include "defines.v"
module Icache(
  input clk,
  input rst_n,
  input flush,
  input stall,
  // input internaFlush,
  input invalidate,
  // request port 
  input                    ifu_icache_addr_valid_i,
  output                   icache_ifu_addr_ready_o,
  input  [`VADDR_W-1:0]    ifu_icache_vaddr_i     ,
  input  [`BPU_PRE_W-1:0]  ifu_icache_predict_i   ,
  // MMU address translation
  output                 icache_mmu_trans_valid_o,
  input                  mmu_icache_trans_ready_i,
  output [`VADDR_W-1:0]  icache_mmu_trans_vaddr_o,
  input  [`PADDR_W-1:0]  mmu_icache_trans_paddr_i,
  // resp port
  output [4-1:0]           icache_ibf_data_valid_o,
  input                    ibf_icache_data_ready_i,
  output [`VADDR_W*4-1:0]  icache_ibf_vaddr_o     ,
  output [32*4-1:0]        icache_ibf_instr_o     ,
  output [4-1:0]           icache_ibf_usePtr_o    ,
  output [`BPU_PRE_W-1:0]  icache_ibf_predict_o   ,
  // mem port
  output                 icache_mem_paddr_valid_o,
  output [`PADDR_W-1:0]  icache_mem_paddr_o      ,
  input                  mem_icache_data_valid_i ,
  input  [511:0]         mem_icache_data_i        
);

/* stage 1 wire */
wire                 icache_s1_addr_valid  ;
wire                 s1_icache_addr_ready  ;
wire [`VADDR_W-1:0]  icache_s1_vaddr       ;
wire                 s1_s2_valid           ;
wire                 s2_s1_ready           ;
wire [`PADDR_W-1:0]  s1_s2_paddr           ;
wire [`VADDR_W-1:0]  s1_s2_vaddr           ;
wire                 s1_icache_trans_valid ;
wire                 icache_s1_trans_valid ;
wire [`VADDR_W-1:0]  s1_icache_trans_vaddr ;
wire [`PADDR_W-1:0]  icache_s1_trans_paddr ;
wire                 s1_sram_valid         ;
wire                 sram_s1_ready         ;
wire [5:0]           s1_sram_index         ;
wire [`BPU_PRE_W-1:0]s1_s2_predict         ;
wire [`BPU_PRE_W-1:0]s2_s3_predict         ;
/* stage 2 wire */
wire                 s2_S3_valid      ;
wire                 s3_s2_ready      ;
wire                 s2_s3_hit        ;
wire [0:3]           s2_s3_replaceWay ;
wire [32*4-1:0]      s2_s3_instr      ;
wire [`PADDR_W-1:0]  s2_s3_paddr      ;
wire [`VADDR_W-1:0]  s2_s3_vaddr      ;
wire                 sram_s2_valid    ;
wire [511:0]         sram_s2_dout0    ;
wire [511:0]         sram_s2_dout1    ;
wire [511:0]         sram_s2_dout2    ;
wire [511:0]         sram_s2_dout3    ;
wire [511:0]         sram_s2_dout4    ;
wire [511:0]         sram_s2_dout5    ;
wire [511:0]         sram_s2_dout6    ;
wire [511:0]         sram_s2_dout7    ;

wire [18:0]          pTagOut0;
wire [18:0]          pTagOut1;
wire [18:0]          pTagOut2;
wire [18:0]          pTagOut3;
wire [18:0]          pTagOut4;
wire [18:0]          pTagOut5;
wire [18:0]          pTagOut6;
wire [18:0]          pTagOut7;

wire                 s3_s2_bank       ;
wire [0:3]           s3_s2_meta_wen   ;
wire [6:0]           s3_s2_meta_index ;
wire [18:0]          s3_s2_meta_data  ;
/* stage 3 wire */
wire                 s3_icache_data_valid ;
wire                 icache_s3_data_ready ;
wire [`VADDR_W-1:0]  s3_icache_vaddr      ;
wire [32*4-1:0]      s3_icache_instr      ;
wire                 s3_icache_paddr_valid;
wire [`PADDR_W-1:0]  s3_icache_paddr      ;
wire                 icache_s3_data_valid ;
wire [511:0]         icache_s3_data       ;
wire                 s3_sram_valid        ;
wire                 sram_s3_ready        ;
wire [6:0]           s3_sram_index        ;
wire [0:3]           s3_sram_wen          ;
wire [511:0]         s3_sram_din          ;
// paddr 32  -> |PTag 20bit | index 6bit | Byte 6bit  
// Meta V 1 TAG 20 

IcacheStage1 s1(
  .clk  (clk  ),
  .rst_n(rst_n),
  .flush(flush),
  .stall(stall),
  // in
  .icache_s1_addr_valid_i ( ifu_icache_addr_valid_i ),
  .s1_icache_addr_ready_o ( icache_ifu_addr_ready_o ),
  .icache_s1_vaddr_i      ( ifu_icache_vaddr_i      ),
  .icache_s1_predict_i    ( ifu_icache_predict_i    ),
  // out
  .s1_s2_valid_o   ( s1_s2_valid   ),
  .s2_s1_ready_i   ( s2_s1_ready   ),
  .s1_s2_paddr_o   ( s1_s2_paddr   ),
  .s1_s2_vaddr_o   ( s1_s2_vaddr   ),
  .s1_s2_predict_o ( s1_s2_predict ),
  // MMU port
  .s1_icache_trans_valid_o (icache_mmu_trans_valid_o ),
  .icache_s1_trans_valid_i (mmu_icache_trans_ready_i ),
  .s1_icache_trans_vaddr_o (icache_mmu_trans_vaddr_o ),
  .icache_s1_trans_paddr_i (mmu_icache_trans_paddr_i ),
  // SRAM port 
  .s1_sram_valid_o ( s1_sram_valid ),
  .sram_s1_ready_i ( sram_s1_ready ),
  .s1_sram_index_o ( s1_sram_index )
);

IcacheStage2 s2(
  .clk  (clk  ),
  .rst_n(rst_n),
  .flush(flush),
  .stall(stall),
  .invalidate(invalidate),
  // s1 in
  .s1_s2_valid_i   ( s1_s2_valid   ),
  .s2_s1_ready_o   ( s2_s1_ready   ),
  .s1_s2_paddr_i   ( s1_s2_paddr   ),
  .s1_s2_vaddr_i   ( s1_s2_vaddr   ),
  .s1_s2_predict_i ( s1_s2_predict ),
  // s2 out
  .s2_S3_valid_o( s2_S3_valid ),
  .s3_s2_ready_i( s3_s2_ready ),
  .s2_s3_hit_o  ( s2_s3_hit   ),
  .s2_s3_instr_o( s2_s3_instr ),
  .s2_s3_paddr_o( s2_s3_paddr ),
  .s2_s3_vaddr_o( s2_s3_vaddr ),
  .s2_s3_replaceWay_o ( s2_s3_replaceWay  ), 
  .s2_s3_predict_o    ( s2_s3_predict     ),
  // sram in
  .sram_s2_valid_i (1'b1),
  .sram_s2_dout0 ( sram_s2_dout0 ),
  .sram_s2_dout1 ( sram_s2_dout1 ),
  .sram_s2_dout2 ( sram_s2_dout2 ),
  .sram_s2_dout3 ( sram_s2_dout3 ),
  .sram_s2_dout4 ( sram_s2_dout4 ),
  .sram_s2_dout5 ( sram_s2_dout5 ),
  .sram_s2_dout6 ( sram_s2_dout6 ),
  .sram_s2_dout7 ( sram_s2_dout7 ),
  .sram_s2_pTag0 ( pTagOut0      ),
  .sram_s2_pTag1 ( pTagOut1      ),
  .sram_s2_pTag2 ( pTagOut2      ),
  .sram_s2_pTag3 ( pTagOut3      ),
  .sram_s2_pTag4 ( pTagOut4      ),
  .sram_s2_pTag5 ( pTagOut5      ),
  .sram_s2_pTag6 ( pTagOut6      ),
  .sram_s2_pTag7 ( pTagOut7      ),
  // s3 meta write port && bypass
  .s3_s2_bank_i       ( s3_s2_bank       ),
  .s3_s2_meta_wen_i   ( s3_s2_meta_wen   ),
  .s3_s2_meta_index_i ( s3_s2_meta_index ),
  .s3_s2_meta_data_i  ( s3_s2_meta_data  ),
  .s3_sram_data_i     ( s3_sram_din      ) 
);

IcacheStage3 s3(
  .clk  (clk  ),
  .rst_n(rst_n),
  .flush(flush),
  .stall(stall),
  .internaFlush(1'b0),
  // s2 in   
  .s2_S3_valid_i ( s2_S3_valid ),
  .s3_s2_ready_i ( s3_s2_ready ),
  .s2_s3_hit_i   ( s2_s3_hit   ),
  .s2_s3_instr_i ( s2_s3_instr ),
  .s2_s3_paddr_i ( s2_s3_paddr ),
  .s2_s3_vaddr_i ( s2_s3_vaddr ),
  .s2_s3_replaceWay_i ( s2_s3_replaceWay ), 
  .s2_s3_predict_i    ( s2_s3_predict    ), 
  // resp port
  .s3_icache_data_valid_o ( icache_ibf_data_valid_o ),
  .icache_s3_data_ready_i ( ibf_icache_data_ready_i ),
  .s3_icache_vaddr_o      ( icache_ibf_vaddr_o      ),
  .s3_icache_instr_o      ( icache_ibf_instr_o      ),
  .s3_icache_usePtr_o     ( icache_ibf_usePtr_o     ),
  .s3_icache_predict_o    ( icache_ibf_predict_o    ),
  // mem port
  .s3_icache_paddr_valid_o ( icache_mem_paddr_valid_o ),
  .s3_icache_paddr_o       ( icache_mem_paddr_o       ),
  .icache_s3_data_valid_i  ( mem_icache_data_valid_i  ),
  .icache_s3_data_i        ( mem_icache_data_i        ),
  // sram port 
  .s3_sram_valid_o ( s3_sram_valid ),  
  .sram_s3_ready_i ( sram_s3_ready ),
  .s3_sram_index_o ( s3_sram_index ),
  .s3_sram_wen_o   ( s3_sram_wen   ),
  .s3_sram_din_o   ( s3_sram_din   ),
  // meta write port
  .s3_s2_bank_o       ( s3_s2_bank       ),
  .s3_s2_meta_wen_o   ( s3_s2_meta_wen   ),
  .s3_s2_meta_index_o ( s3_s2_meta_index ),
  .s3_s2_meta_data_o  ( s3_s2_meta_data  )
);

Sram4x2ArrayHasArbiter DataRam(
  .clk(clk),
  .rst_n(rst_n),
  // 1 write 
  .wr_valid ( s3_sram_valid   ),
  .wr_ready ( sram_s3_ready   ),
  .wen      ( s3_sram_wen     ),
  .wr_index ( s3_sram_index   ),
  .din      ( s3_sram_din     ),
  .pTagin   ( s3_s2_meta_data ),
  
  // 4 read
  .rd_valid ( s1_sram_valid ),
  .rd_ready ( sram_s1_ready ), 
  .rd_index ( s1_sram_index ),

  .dout0    ( sram_s2_dout0 ),
  .dout1    ( sram_s2_dout1 ),
  .dout2    ( sram_s2_dout2 ),
  .dout3    ( sram_s2_dout3 ),
  .dout4    ( sram_s2_dout4 ),
  .dout5    ( sram_s2_dout5 ),
  .dout6    ( sram_s2_dout6 ),
  .dout7    ( sram_s2_dout7 ),

  .pTagOut0 ( pTagOut0      ),
  .pTagOut1 ( pTagOut1      ),
  .pTagOut2 ( pTagOut2      ),
  .pTagOut3 ( pTagOut3      ),
  .pTagOut4 ( pTagOut4      ),
  .pTagOut5 ( pTagOut5      ),
  .pTagOut6 ( pTagOut6      ),
  .pTagOut7 ( pTagOut7      )
);
endmodule


module IcacheStage1(
  input clk,
  input rst_n,
  // control 
  input flush,
  input stall,
  // in
  input                    icache_s1_addr_valid_i,
  output                   s1_icache_addr_ready_o,
  input  [`VADDR_W-1:0]    icache_s1_vaddr_i     ,
  input  [`BPU_PRE_W-1:0]  icache_s1_predict_i   ,
  // out
  output                   s1_s2_valid_o  ,
  input                    s2_s1_ready_i  ,
  output [`PADDR_W-1:0]    s1_s2_paddr_o  ,
  output [`VADDR_W-1:0]    s1_s2_vaddr_o  ,
  output [`BPU_PRE_W-1:0]  s1_s2_predict_o,
  // MMU port
  output                 s1_icache_trans_valid_o,
  input                  icache_s1_trans_valid_i,
  output [`VADDR_W-1:0]  s1_icache_trans_vaddr_o,
  input  [`PADDR_W-1:0]  icache_s1_trans_paddr_i,
  // SRAM port 
  output                 s1_sram_valid_o,
  input                  sram_s1_ready_i,
  output [5:0]           s1_sram_index_o
);
// function
// 1. address translation vaddr-> paddr
// 2. data sram access  (need paddr[9:4])

reg s1Valid;
reg [`VADDR_W-1:0] vaddr_q;
reg [`BPU_PRE_W-1:0] predict_q;
// s1 -> s2 handshake
wire icache_s1_hs = icache_s1_addr_valid_i && s1_icache_addr_ready_o;
wire s1_s2_hs = s1_s2_valid_o && s2_s1_ready_i;
wire s1_sram_hs = s1_sram_valid_o && sram_s1_ready_i;
//前级信号可以进入的条件 1.V为空  2.后一级握手成功 并且处于非刷新时刻
assign s1_icache_addr_ready_o = (~s1Valid || s1_s2_hs) && ~flush ;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    s1Valid <= 1'b0;
  else if(flush)
    s1Valid <= 1'b0;
  else if(s1_icache_addr_ready_o)
    s1Valid <= icache_s1_addr_valid_i;

always@(posedge clk)
  if(icache_s1_hs)begin
    vaddr_q <= icache_s1_vaddr_i;
    predict_q   <= icache_s1_predict_i;
  end

// TLB
assign s1_icache_trans_valid_o = s1Valid;
assign s1_icache_trans_vaddr_o = vaddr_q;

// 下一级信号有效条件  寄存器数据有效 且 转换地址有效 且SRAM 请求握手成功
assign s1_s2_valid_o   = s1Valid && icache_s1_trans_valid_i && s1_sram_hs;
assign s1_s2_paddr_o   = icache_s1_trans_paddr_i;
assign s1_s2_vaddr_o   = vaddr_q    ;
assign s1_s2_predict_o = predict_q  ;
// SRAM
assign s1_sram_valid_o = s1Valid && icache_s1_trans_valid_i;
assign s1_sram_index_o = icache_s1_trans_paddr_i[11:6];

endmodule

module IcacheStage2(
  input clk,
  input rst_n,
  input flush,
  input stall,
  input invalidate,
  // s1 in
  input                    s1_s2_valid_i   ,
  output                   s2_s1_ready_o   ,
  input  [`PADDR_W-1:0]    s1_s2_paddr_i   ,
  input  [`VADDR_W-1:0]    s1_s2_vaddr_i   ,
  input  [`BPU_PRE_W-1:0]  s1_s2_predict_i ,
  // s2 out
  output                   s2_S3_valid_o     ,
  input                    s3_s2_ready_i     ,
  output                   s2_s3_hit_o       ,
  output [0:3]             s2_s3_replaceWay_o, //onehot
  output [32*4-1:0]        s2_s3_instr_o     ,
  output [`PADDR_W-1:0]    s2_s3_paddr_o     ,
  output [`VADDR_W-1:0]    s2_s3_vaddr_o     ,
  output [`BPU_PRE_W-1:0]  s2_s3_predict_o   ,
  // sram in
  input                  sram_s2_valid_i,
  input  [511:0]         sram_s2_dout0  ,
  input  [511:0]         sram_s2_dout1  ,
  input  [511:0]         sram_s2_dout2  ,
  input  [511:0]         sram_s2_dout3  ,
  input  [511:0]         sram_s2_dout4  ,
  input  [511:0]         sram_s2_dout5  ,
  input  [511:0]         sram_s2_dout6  ,
  input  [511:0]         sram_s2_dout7  ,

  input  [18:0]          sram_s2_pTag0  ,
  input  [18:0]          sram_s2_pTag1  ,
  input  [18:0]          sram_s2_pTag2  ,
  input  [18:0]          sram_s2_pTag3  ,
  input  [18:0]          sram_s2_pTag4  ,
  input  [18:0]          sram_s2_pTag5  ,
  input  [18:0]          sram_s2_pTag6  ,
  input  [18:0]          sram_s2_pTag7  ,

  // s3 meta write port && bypass
  input                  s3_s2_bank_i,
  input  [0:3]           s3_s2_meta_wen_i  ,
  input  [6:0]           s3_s2_meta_index_i,
  input  [18:0]          s3_s2_meta_data_i ,
  input  [511:0]         s3_sram_data_i    
);
// function
// 1.hit check          ok
// 2.bypass check       ok
// 3.sram dout select   ok
// 4.replaceWay select  ok
// 5.clear all metaV    ok
// 6.update meta        ok

// reg [63:0] hitCNT;
// reg [63:0] instCNT;
// always@(posedge clk or negedge rst_n)begin
//   if(~rst_n)begin
//     hitCNT  <= 'd0;
//     instCNT <= 'd0;
//   end else begin
//     hitCNT  <= hitCNT  + (s2_S3_valid_o & s3_s2_ready_i & s2_s3_hit_o);
//     instCNT <= instCNT + (s2_S3_valid_o & s3_s2_ready_i);
//   end
//   if(instCNT[8:0]=='d0)begin
//     $fwrite(32'h8000_0001, "Icache hit ratio is %d / %d = %f \n", hitCNT ,instCNT , ((hitCNT *10000) / instCNT));
//     $fflush();
//   end
// end
reg s2Valid;
reg [`PADDR_W-1:0]  q_paddr;
reg [`VADDR_W-1:0]  q_vaddr;
reg [`BPU_PRE_W-1:0]q_predict;
reg hitSave,hitSaveV;
reg saveFlag;

wire s1_s2_hs = s2_s1_ready_o && s1_s2_valid_i;
wire s2_s3_hs = s2_S3_valid_o && s3_s2_ready_i;
// valid为空 或者 下一级握手
assign s2_s1_ready_o = ~s2Valid || s2_s3_hs;
// 数据有效 且 sram数据有效
assign s2_S3_valid_o =  s2Valid && sram_s2_valid_i;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    s2Valid <= 1'b0;
  else if (flush)
    s2Valid <= 1'b0;
  else if(s2_s1_ready_o)
    s2Valid <= s1_s2_valid_i;

always@(posedge clk )begin
  if(s1_s2_hs)begin
    q_paddr   <= s1_s2_paddr_i  ;
    q_vaddr   <= s1_s2_vaddr_i  ;
    q_predict <= s1_s2_predict_i; 
  end
end
assign s2_s3_paddr_o   = q_paddr   ;
assign s2_s3_vaddr_o   = q_vaddr   ;
assign s2_s3_predict_o = q_predict ;
reg meta0Bank0V [0:63] ;reg meta0Bank1V [0:63] ;
reg meta1Bank0V [0:63] ;reg meta1Bank1V [0:63] ;
reg meta2Bank0V [0:63] ;reg meta2Bank1V [0:63] ;
reg meta3Bank0V [0:63] ;reg meta3Bank1V [0:63] ;
reg [2:0] plruBank0 [0:63]; // B0 B1 B2
reg [2:0] plruBank1 [0:63]; // B0 B1 B2

/************ hit check ***********************/
wire [5:0] index = {6{s2Valid}} & q_paddr[11:6];
wire [0:3] hit_vector;
wire bankSel = q_paddr[12];

assign hit_vector[0]  = bankSel ? (meta0Bank1V[index] & (sram_s2_pTag4 == q_paddr[`PADDR_W-1:13])) 
                                : (meta0Bank0V[index] & (sram_s2_pTag0 == q_paddr[`PADDR_W-1:13]));
assign hit_vector[1]  = bankSel ? (meta1Bank1V[index] & (sram_s2_pTag5 == q_paddr[`PADDR_W-1:13])) 
                                : (meta1Bank0V[index] & (sram_s2_pTag1 == q_paddr[`PADDR_W-1:13]));
assign hit_vector[2]  = bankSel ? (meta2Bank1V[index] & (sram_s2_pTag6 == q_paddr[`PADDR_W-1:13])) 
                                : (meta2Bank0V[index] & (sram_s2_pTag2 == q_paddr[`PADDR_W-1:13]));
assign hit_vector[3]  = bankSel ? (meta3Bank1V[index] & (sram_s2_pTag7 == q_paddr[`PADDR_W-1:13])) 
                                : (meta3Bank0V[index] & (sram_s2_pTag3 == q_paddr[`PADDR_W-1:13]));

wire   bypass_hit     = (bankSel == s3_s2_bank_i) && (s3_s2_meta_data_i == q_paddr[`PADDR_W-1:13]) && |s3_s2_meta_wen_i && (index == s3_s2_meta_index_i) ;
assign s2_s3_hit_o =   (saveFlag && ~bypass_hit) ? hitSave : (|hit_vector || bypass_hit) ;
// 根据命中的way选择相应的数据
wire [511:0] dout0 = bankSel ? sram_s2_dout4 : sram_s2_dout0;
wire [511:0] dout1 = bankSel ? sram_s2_dout5 : sram_s2_dout1;
wire [511:0] dout2 = bankSel ? sram_s2_dout6 : sram_s2_dout2;
wire [511:0] dout3 = bankSel ? sram_s2_dout7 : sram_s2_dout3;

wire [511:0] hit_data = ({512{hit_vector[0]}} & dout0 )| 
                        ({512{hit_vector[1]}} & dout1 )| 
                        ({512{hit_vector[2]}} & dout2 )| 
                        ({512{hit_vector[3]}} & dout3 );

wire [511:0] out_data = bypass_hit ? s3_sram_data_i : hit_data;

wire [32*4-1:0] s2_s3_instr  = ({(32*4){( q_paddr[5:4] == 2'd0 )}} & out_data[128*1-1:128*0])|
                               ({(32*4){( q_paddr[5:4] == 2'd1 )}} & out_data[128*2-1:128*1])|
                               ({(32*4){( q_paddr[5:4] == 2'd2 )}} & out_data[128*3-1:128*2])|
                               ({(32*4){( q_paddr[5:4] == 2'd3 )}} & out_data[128*4-1:128*3]);

reg [32*4-1:0] instrSave ;
reg s1_s2_hs_r;
always@(posedge clk or negedge rst_n)
  if(~rst_n)
    saveFlag <= 1'b0;
  else if(flush)
    saveFlag <= 1'b0;
  else if(s1_s2_hs_r && ~s2_s3_hs && s2Valid)
    saveFlag <= 1'b1;
  else if(s2_s3_hs)
    saveFlag <= 1'b0;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    s1_s2_hs_r <= 1'b0;
  else
    s1_s2_hs_r <= s1_s2_hs;

always@(posedge clk or negedge rst_n)
  if(~rst_n)begin
    instrSave <= 'd0;
    hitSave <= 'd0;
  end else if(s1_s2_hs_r || bypass_hit)begin
    instrSave <= s2_s3_instr;
    hitSave  <= bypass_hit || |hit_vector;
  end

// 如果当拍传不出去 保存信息
assign s2_s3_instr_o = (saveFlag && ~bypass_hit ) ? instrSave : s2_s3_instr;

/************ find repalce way bank0 ***********************/
wire [0:3] invalid_victorBank0 = ~{meta0Bank0V[index],meta1Bank0V[index],meta2Bank0V[index],meta3Bank0V[index]};
wire invalidBank0 = |invalid_victorBank0;
// PLRU tree  
//        ALL valid
//       B0         N
//     /     \    invalidWay
//    B1     B2
//   /  \   /  \
//  L0  L1 L2 L3
wire [0:3] plru_wayBank0 = { ~plruBank0[index][2] & ~plruBank0[index][1] ,
                             ~plruBank0[index][2] &  plruBank0[index][1] ,
                              plruBank0[index][2] & ~plruBank0[index][0] ,
                              plruBank0[index][2] &  plruBank0[index][0] };

wire [0:3] dec_wayBank0 = invalidBank0 ? invalid_victorBank0 : plru_wayBank0;
reg [0:3]replace_wayBank0 ;
always@(*)begin
  replace_wayBank0 = 4'b1000;
  if(dec_wayBank0[0]) replace_wayBank0 = 4'b1000;
  if(dec_wayBank0[1]) replace_wayBank0 = 4'b0100;
  if(dec_wayBank0[2]) replace_wayBank0 = 4'b0010;
  if(dec_wayBank0[3]) replace_wayBank0 = 4'b0001;
end
/************ find repalce way bank1 ***********************/
wire [0:3] invalid_victorBank1 = ~{meta0Bank1V[index],meta1Bank1V[index],meta2Bank1V[index],meta3Bank1V[index]};
wire invalidBank1 = |invalid_victorBank1;
// PLRU tree  
//        ALL valid
//       B0         N
//     /     \    invalidWay
//    B1     B2
//   /  \   /  \
//  L0  L1 L2 L3
wire [0:3] plru_wayBank1 = { ~plruBank1[index][2] & ~plruBank1[index][1] ,
                             ~plruBank1[index][2] &  plruBank1[index][1] ,
                              plruBank1[index][2] & ~plruBank1[index][0] ,
                              plruBank1[index][2] &  plruBank1[index][0] };

wire [0:3] dec_wayBank1 = invalidBank1 ? invalid_victorBank1 : plru_wayBank1;

reg [0:3]replace_wayBank1 ;
always@(*)begin
  replace_wayBank1 = 4'b1000;
  if(dec_wayBank1[0]) replace_wayBank1 = 4'b1000;
  if(dec_wayBank1[1]) replace_wayBank1 = 4'b0100;
  if(dec_wayBank1[2]) replace_wayBank1 = 4'b0010;
  if(dec_wayBank1[3]) replace_wayBank1 = 4'b0001;
end

assign s2_s3_replaceWay_o = bankSel ? replace_wayBank1 : replace_wayBank0;
/************ update meta ***********************/
function UpdatePlru;
  input [0:3] sel;
  input [0:2] plruVal;
  begin
    UpdatePlru = sel[0] ? {1'b1 , 1'b1 ,  plruVal[0]} :
                 sel[1] ? {1'b1 , 1'b0 ,  plruVal[0]} :
                 sel[2] ? {1'b0 , plruVal[1] ,  1'b1} :
                 sel[3] ? {1'b0 , plruVal[1] ,  1'b0} : plruVal;
  end
endfunction

genvar i ;
generate 
  /*************** bank0 ******************/
  for(i=0;i<64;i=i+1)begin
    // way 0
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta0Bank0V[i]   <= 'd0;
      end else if(invalidate)begin
        meta0Bank0V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[0] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b0))begin
        meta0Bank0V[i]   <= 1'b1;
      end
    // way 1
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta1Bank0V[i]   <= 'd0;
      end else if(invalidate)begin
        meta1Bank0V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[1] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b0))begin
        meta1Bank0V[i]   <= 1'b1;
      end
    // way 2
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta2Bank0V[i]   <= 'd0;
      end else if(invalidate)begin
        meta2Bank0V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[2] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b0))begin
        meta2Bank0V[i]   <= 1'b1;
      end
    // way 3
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta3Bank0V[i]   <= 'd0;
      end else if(invalidate)begin
        meta3Bank0V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[3] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b0))begin
        meta3Bank0V[i]   <= 1'b1;
      end
      always@(posedge clk or negedge rst_n)
        if(~rst_n)
          plruBank0[i] <= 'd0;
        else if(invalidate)begin
          plruBank0[i] <= 'd0;
        end else if(((index == i) && (bankSel == 1'b0)) || ((s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b0)))begin
          plruBank0[i] <= UpdatePlru(hit_vector,UpdatePlru(s3_s2_meta_wen_i,plruBank0[i]));
        end
  end
  /*************** bank1 ******************/
  for(i=0;i<64;i=i+1)begin
    // way 0
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta0Bank1V[i]   <= 'd0;
      end else if(invalidate)begin
        meta0Bank1V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[0] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b1))begin
        meta0Bank1V[i]   <= 1'b1;
      end
    // way 1
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta1Bank1V[i]   <= 'd0;
      end else if(invalidate)begin
        meta1Bank1V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[1] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b1))begin
        meta1Bank1V[i]   <= 1'b1;
      end
    // way 2
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta2Bank1V[i]   <= 'd0;
      end else if(invalidate)begin
        meta2Bank1V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[2] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b1))begin
        meta2Bank1V[i]   <= 1'b1;
      end
    // way 3
    always@(posedge clk or negedge rst_n)
      if(~rst_n)begin
        meta3Bank1V[i]   <= 'd0;
      end else if(invalidate)begin
        meta3Bank1V[i]   <= 'd0;
      end else if(s3_s2_meta_wen_i[3] && (s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b1))begin
        meta3Bank1V[i]   <= 1'b1;
      end
      always@(posedge clk or negedge rst_n)
        if(~rst_n)
          plruBank1[i] <= 'd0;
        else if(invalidate)begin
          plruBank1[i] <= 'd0;
        end else if(((index == i) && (bankSel == 1'b1)) || ((s3_s2_meta_index_i == i) && (s3_s2_bank_i == 1'b1)))begin
          plruBank1[i] <= UpdatePlru(hit_vector,UpdatePlru(s3_s2_meta_wen_i,plruBank1[i]));
        end
  end
endgenerate
endmodule

module IcacheStage3(
  input clk,
  input rst_n,
  input flush,
  input stall,
  input internaFlush,
  // s2 in   
  input                   s2_S3_valid_i     ,
  output                  s3_s2_ready_i     ,
  input                   s2_s3_hit_i       ,
  input  [0:3]            s2_s3_replaceWay_i, //onehot
  input  [32*4-1:0]       s2_s3_instr_i     ,
  input  [`PADDR_W-1:0]   s2_s3_paddr_i     ,
  input  [`VADDR_W-1:0]   s2_s3_vaddr_i     ,
  input  [`BPU_PRE_W-1:0] s2_s3_predict_i   ,
  // resp port
  output [4-1:0]          s3_icache_data_valid_o,
  input                   icache_s3_data_ready_i,
  output [`VADDR_W*4-1:0] s3_icache_vaddr_o     ,
  output [4-1:0]          s3_icache_usePtr_o    ,
  output [32*4-1:0]       s3_icache_instr_o     ,
  output [`BPU_PRE_W-1:0] s3_icache_predict_o   ,
  // mem port
  output                 s3_icache_paddr_valid_o,
  output [`PADDR_W-1:0]  s3_icache_paddr_o      ,
  input                  icache_s3_data_valid_i ,
  input  [511:0]         icache_s3_data_i       ,
  // sram port 
  output                 s3_sram_valid_o,  
  input                  sram_s3_ready_i,
  output [6:0]           s3_sram_index_o,
  output [0:3]           s3_sram_wen_o  ,
  output [511:0]         s3_sram_din_o  ,
  // meta write port
  output                 s3_s2_bank_o,
  output [0:3]           s3_s2_meta_wen_o,
  output [6:0]           s3_s2_meta_index_o,
  output [18:0]          s3_s2_meta_data_o
);
// function
// 1.resp to ibf  ok
// 3.mem refill (sram write , meta write)ok
// 4.refill bypass ok
// 5.invalidate   ok

reg [2:0] cur_state;
reg [2:0] nxt_state;
reg s3Valid , flushRuning;
wire [32*4-1:0] bypass_data;
parameter IDLE   = 3'b001,
          READ   = 3'b010,
          REFILL = 3'b100;
wire bypass;
reg  bypassR;
wire s3_icache_hs = |s3_icache_data_valid_o && icache_s3_data_ready_i;
wire s2_s3_hs     = s3_s2_ready_i && s2_S3_valid_i;
assign s3_s2_ready_i = ~(flushRuning) && (~s3Valid || s3_icache_hs);

reg                  q_hit        ;
reg [0:3]            q_replaceWay ;
reg [32*4-1:0]       q_instr      ;
reg [`PADDR_W-1:0]   q_paddr      ;
reg [`VADDR_W-1:0]   q_vaddr      ;
reg [`BPU_PRE_W-1:0] q_predict    ;


always@(posedge clk or negedge rst_n)
  if(~rst_n)
    s3Valid <= 1'b0;
  else if(flush || internaFlush)
    s3Valid <= 1'b0;
  else if(s3_s2_ready_i)
    s3Valid <= s2_S3_valid_i;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    flushRuning <= 1'b0;
  else if((cur_state == READ) && (nxt_state == IDLE)) // 撤销完成
    flushRuning <= 1'b0;
  else if((cur_state == REFILL) && (nxt_state == IDLE)) // 撤销完成
    flushRuning <= 1'b0;
  else if(flush && (cur_state != IDLE) )
    flushRuning <= 1'b1;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    bypassR <= 1'b0;
  else if(s3_icache_hs | flush | internaFlush)
    bypassR <= 1'b0;
  else if(s3Valid && bypass )
    bypassR <= 1'b1;

always@(posedge clk)begin
    if(s2_s3_hs)begin
      q_hit        <=  s2_s3_hit_i        ;
      q_replaceWay <=  s2_s3_replaceWay_i ;
      q_paddr      <=  s2_s3_paddr_i      ;
      q_vaddr      <=  s2_s3_vaddr_i      ;
      // 旁路阶段 没有握手 载入旁路数据 否则载入上一级信号
      q_instr      <=  s2_s3_instr_i;
      q_predict    <=  s2_s3_predict_i;
    end else if(bypass) begin
      q_instr      <=  bypass_data  ;
    end 
end

/* 给下一级的回复信号 */
// 1.数据有效且命中且不为mmio访问 2.refill bypass(包含mmio和refill)
wire   ctrlValid = ((q_hit && s3Valid) || (bypass && ~flushRuning) || bypassR );
assign s3_icache_data_valid_o[0] = ctrlValid & ((q_paddr[3:2] == 2'd0) );
assign s3_icache_data_valid_o[1] = ctrlValid & ((q_paddr[3:2] == 2'd0) || (q_paddr[3:2] == 2'd1) );
assign s3_icache_data_valid_o[2] = ctrlValid & ((q_paddr[3:2] == 2'd0) || (q_paddr[3:2] == 2'd1) || (q_paddr[3:2] == 2'd2) );
assign s3_icache_data_valid_o[3] = ctrlValid ;

assign s3_icache_usePtr_o[0] = (q_paddr[3:2] == 2'd0) ? q_paddr[1] : 1'b0;
assign s3_icache_usePtr_o[1] = (q_paddr[3:2] == 2'd1) ? q_paddr[1] : 1'b0;
assign s3_icache_usePtr_o[2] = (q_paddr[3:2] == 2'd2) ? q_paddr[1] : 1'b0;
assign s3_icache_usePtr_o[3] = (q_paddr[3:2] == 2'd3) ? q_paddr[1] : 1'b0;
                                                                                              
assign s3_icache_vaddr_o[`VADDR_W*1-1:`VADDR_W*0] = {q_vaddr[`VADDR_W-1:4],4'b0000} ;
assign s3_icache_vaddr_o[`VADDR_W*2-1:`VADDR_W*1] = {q_vaddr[`VADDR_W-1:4],4'b0100} ;
assign s3_icache_vaddr_o[`VADDR_W*3-1:`VADDR_W*2] = {q_vaddr[`VADDR_W-1:4],4'b1000} ;
assign s3_icache_vaddr_o[`VADDR_W*4-1:`VADDR_W*3] = {q_vaddr[`VADDR_W-1:4],4'b1100} ;

assign s3_icache_instr_o      = bypass ? bypass_data : q_instr ;
assign s3_icache_predict_o    = q_predict;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    cur_state <= IDLE;
  else  
    cur_state <= nxt_state;

always@(*)begin
  case(cur_state)
    IDLE: // 有效状态 + 未命中或者MMIO + MEM访问完成正在等待下一级准备好
      if(s3Valid && ~q_hit && ~bypassR && ~flush)
        nxt_state = READ;
      else
        nxt_state = IDLE;
    READ:
      // 刷新命令并且mem返回数据时 回到初始态 
      if((flush || flushRuning) && icache_s3_data_valid_i) 
        nxt_state = IDLE;
      // 正常访存时 接收到返回有效信号 进入到重填阶段
      else if(icache_s3_data_valid_i)
        nxt_state = REFILL;
      else
        nxt_state = READ;
    REFILL:
      // mem请求返回时，等待sram写入完成  mmio请求返回时，直接回到空闲
      if(sram_s3_ready_i)
        nxt_state = IDLE;
      else
        nxt_state = REFILL;
    default:nxt_state=IDLE;
  endcase
end
// mem req
assign s3_icache_paddr_valid_o = (cur_state == READ);
assign s3_icache_paddr_o       = q_paddr;
assign bypass                  = (cur_state == REFILL) && (nxt_state == IDLE); // 重填结束后才旁路数据

assign bypass_data  = 
  ({(32*4){( q_paddr[5:4] == 4'd0 )}} & icache_s3_data_i[128*1-1:128*0])|
  ({(32*4){( q_paddr[5:4] == 4'd1 )}} & icache_s3_data_i[128*2-1:128*1])|
  ({(32*4){( q_paddr[5:4] == 4'd2 )}} & icache_s3_data_i[128*3-1:128*2])|
  ({(32*4){( q_paddr[5:4] == 4'd3 )}} & icache_s3_data_i[128*4-1:128*3]);

// sram data write
assign s3_sram_valid_o = (cur_state == REFILL) ;
assign s3_sram_index_o = q_paddr[12:6];
assign s3_sram_wen_o   = {4{s3_sram_valid_o}} & q_replaceWay ;
assign s3_sram_din_o   = icache_s3_data_i;
// meta write
assign s3_s2_bank_o       = q_paddr[12] ;
assign s3_s2_meta_wen_o   = s3_sram_wen_o;
assign s3_s2_meta_index_o = s3_sram_index_o;
assign s3_s2_meta_data_o  = q_paddr[`PADDR_W-1:13];
endmodule

module Sram4x2ArrayHasArbiter(
  input clk,
  input rst_n,
  // 1 write 
  input  wr_valid,
  output wr_ready,
  input [0:3]   wen,
  input [6:0]   wr_index,
  input [511:0] din,
  input [18:0]  pTagin,
  
  // 4 read
  input  rd_valid,
  output rd_ready, 
  input  [5:0] rd_index,
  output [511:0] dout0,
  output [511:0] dout1,
  output [511:0] dout2,
  output [511:0] dout3,
  output [511:0] dout4,
  output [511:0] dout5,
  output [511:0] dout6,
  output [511:0] dout7,
  output [18:0]  pTagOut0,
  output [18:0]  pTagOut1,
  output [18:0]  pTagOut2,
  output [18:0]  pTagOut3,
  output [18:0]  pTagOut4,
  output [18:0]  pTagOut5,
  output [18:0]  pTagOut6,
  output [18:0]  pTagOut7
);

// Arbiter
parameter NONE = 3'b001,
          USR1 = 3'b010,
          USR2 = 3'b100;
reg [2:0] state;
always@(posedge clk or negedge rst_n)
  if(~rst_n)
    state <= NONE;
  else begin
    case(state)
      NONE:
        if(wr_valid)
          state <= USR2;
        else if(rd_valid)
          state <= USR1;
      USR1:
        if(wr_valid)
          state <= USR2;
        else if(rd_valid)
          state <= USR1;
        else
          state <= NONE;
      USR2:
        if(rd_valid)
          state <= USR1;
        else if(wr_valid)
          state <= USR2;
        else
          state <= NONE;
      default:state<=NONE;
    endcase
  end

assign wr_ready = (state == USR2);
assign rd_ready = (state == USR1);
 
wire [5:0] sram_addr = (state == USR2) ? wr_index[5:0] : rd_index; 

wire [511:0] dout [0:7];
wire [19:0]  pTag [0:7];
genvar i;
generate  
  for(i=0;i<4;i=i+1)begin : sram_bank_array
    IcacheData64x512bit DATA_BANK0(
      .CLK (clk),
      .CEN (state == NONE),
      .WEN (~(wen[i] && (state == USR2) && ~wr_index[6])),
      .BWEN(~512'h0),
      .A(sram_addr),
      .D(din), 
      .Q(dout[i])
    );
    IcacheData64x512bit DATA_BANK1(
      .CLK (clk),
      .CEN (state == NONE),
      .WEN (~(wen[i] && (state == USR2) && wr_index[6])),
      .BWEN(~512'h0),
      .A(sram_addr),
      .D(din), 
      .Q(dout[i+4])
    );

    IcachePtag64x19bit TAG_BANK0(
      .CLK (clk),
      .CEN (state == NONE),
      .WEN (~(wen[i] && (state == USR2) && ~wr_index[6])),
      .BWEN(~19'h0),
      .A(sram_addr),
      .D(pTagin), 
      .Q(pTag[i])
    );
    IcachePtag64x19bit TAG_BANK1(
        .CLK (clk),
        .CEN (state == NONE),
        .WEN (~(wen[i] && (state == USR2) && wr_index[6])),
        .BWEN(~19'h0),
        .A(sram_addr),
        .D(pTagin), 
        .Q(pTag[i+4])
    );

  end
endgenerate

assign dout0 = dout[0];
assign dout1 = dout[1];
assign dout2 = dout[2];
assign dout3 = dout[3];
assign dout4 = dout[4];
assign dout5 = dout[5];
assign dout6 = dout[6];
assign dout7 = dout[7];

assign pTagOut0 = pTag[0];
assign pTagOut1 = pTag[1];
assign pTagOut2 = pTag[2];
assign pTagOut3 = pTag[3];
assign pTagOut4 = pTag[4];
assign pTagOut5 = pTag[5];
assign pTagOut6 = pTag[6];
assign pTagOut7 = pTag[7];
endmodule